TODO

  • Compute scores after dropping f < 0.5

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline
import pandas as pd
import scipy

In [2]:
import sklearn.metrics
import mir_eval

In [3]:
import cPickle as pickle

In [4]:
from glob import glob
import re

In [28]:
pd.set_option('precision', 4)
pd.set_option('max_rows', 2000)

In [6]:
np.set_printoptions(precision=3)
seaborn.set(style='darkgrid')

In [7]:
def plot_curve(file='', intervals=None, labels=None, scores=None, norm=None, min_score=0.0, **kwargs):
    
    file_name = file
    
    label_agreement = np.zeros((len(labels), len(labels)), dtype=bool)
    
    for i in range(len(labels)):
        for j in range(i, len(labels)):
            label_agreement[i, j] = (labels[i] == labels[j])
            label_agreement[j, i] = label_agreement[i, j]
    
    time_norm = 1
    
    durations = np.diff(intervals, axis=1).ravel()
    
    if norm == 'min':
        time_norm = np.minimum.outer(durations, durations)
        
    elif norm == 'max':
        time_norm = np.maximum.outer(durations, durations)
        
    elif norm == 'hmean':
        time_norm = 2./np.add.outer(durations, durations)
        time_norm *= np.multiply.outer(durations, durations)
    
    # TODO: have the label agreement index out nan-valued scores
    
    scores = scores / time_norm
    
    label_agreement[np.tril_indices_from(label_agreement, k=0)] = False
    
    label_agreement[~np.isfinite(scores)] = False
    
    label_disagreement = ~label_agreement
    
    label_disagreement[np.tril_indices_from(label_disagreement, k=0)] = False
    
    label_disagreement[~np.isfinite(scores)] = False
    
    tp_scores = scores[label_agreement]
    fp_scores = scores[label_disagreement]
    
    num_pos = np.sum(label_agreement)
    num_neg = np.sum(label_disagreement)
    
    y_true = np.concatenate([np.zeros(len(tp_scores)), np.ones(len(fp_scores))])
    y_score = np.concatenate([tp_scores, fp_scores])
    
    fpr, tpr, thr = sklearn.metrics.roc_curve(y_true, y_score)
    
    tp = num_pos * tpr
    fp = num_neg * fpr
    
    precision = tp / (tp + fp)
    recall = tpr
    
    fmeasure = np.asarray([mir_eval.util.f_measure(p, r) for p, r in zip(precision, recall)])
    
    k = np.argmax(fmeasure)
    thr_opt = thr[k]
    
    plt.figure(figsize=(12, 4))
    plt.subplot(1,3,1)
    plt.plot([0, 1], [0, 1], linestyle='--', alpha=0.5)
    
    plt.plot(fpr, tpr)
    plt.plot(fpr[k], tpr[k], color='r', marker='*', markersize=10, alpha=0.5)
   
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title(file_name)
    
    plt.subplot(1,3,2)
    plt.plot(recall, precision)
    plt.plot(recall[k], precision[k], marker='*', markersize=10, alpha=0.5, color='r')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('norm={}'.format(norm))
   
    plt.subplot(1,3,3)
    plt.plot(thr, fmeasure)
    k = np.argmax(fmeasure)
    plt.plot(thr[k], fmeasure[k], marker='*', markersize=10, alpha=0.5, color='r')
   
    plt.xlabel(r'$\theta$')
    plt.ylabel('$F_1$')
    plt.title(r'({:.3f}, {:.3f})'.format(thr[k], fmeasure[k]))
    plt.tight_layout()
    
    return thr[k], fmeasure[k]

In [15]:
def summarize_results(files):
    
    data = {}
    
    for fname in files:
        results = pickle.load(open(fname, 'r'))
        
        fscores = pd.DataFrame.from_dict([f['fmeasures'] for f in results['file_scores']
                                          if f['fmeasures'] is not None])
        
        match = re.match('.*scores_datasetE(?P<dataset>.*?)E(?P<scale>.*?)_distE(?P<metric>.*).pk', fname)

        fscores['dataset'] = pd.Series(match.group('dataset'), index=fscores.index)
        fscores['scale'] = pd.Series(match.group('dataset') + '_' + match.group('scale'), index=fscores.index)
        fscores['metric'] = pd.Series(match.group('metric'), index=fscores.index)
        
        data[fname] = fscores
        
    return data

In [16]:
output = summarize_results(sorted(glob('../data/*.pk')))

In [17]:
all_results = pd.concat(output.values())

In [31]:
all_results_ = all_results

In [32]:
all_results = all_results_[all_results_['none'] > 0.5]

In [33]:
all_results.groupby(['scale', 'metric']).hist(layout=(1,4), figsize=(12,2))


Out[33]:
scale                      metric     
Isophonics_level_function  L1             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           L2             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           correlation    [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
SALAMI_level_function      L1             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           L2             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           correlation    [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
SALAMI_level_large_scale   L1             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           L2             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           correlation    [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
SALAMI_level_small_scale   L1             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           L2             [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
                           correlation    [[Axes(0.125,0.125;0.158163x0.775), Axes(0.330...
dtype: object

In [34]:
print all_results.groupby(['scale', 'metric']).mean()


                                       hmean    max    min   none
scale                     metric                                 
Isophonics_level_function L1           0.778  0.783  0.779  0.936
                          L2           0.744  0.727  0.752  0.934
                          correlation  0.815  0.780  0.836  0.895
SALAMI_level_function     L1           0.741  0.694  0.751  0.824
                          L2           0.734  0.680  0.744  0.827
                          correlation  0.750  0.704  0.764  0.820
SALAMI_level_large_scale  L1           0.749  0.714  0.755  0.790
                          L2           0.748  0.707  0.752  0.788
                          correlation  0.764  0.735  0.769  0.813
SALAMI_level_small_scale  L1           0.696  0.674  0.702  0.717
                          L2           0.697  0.671  0.704  0.716
                          correlation  0.725  0.698  0.725  0.742

In [35]:
all_results.groupby(['scale', 'metric']).describe()


Out[35]:
hmean max min none
scale metric
Isophonics_level_function L1 count 284.000 280.000 287.000 287.000
mean 0.778 0.783 0.779 0.936
std 0.166 0.189 0.163 0.110
min 0.281 0.158 0.386 0.510
25% 0.652 0.660 0.664 0.923
50% 0.780 0.813 0.776 0.990
75% 0.933 0.957 0.939 1.000
max 1.000 1.000 1.000 1.000
L2 count 284.000 278.000 287.000 287.000
mean 0.744 0.727 0.752 0.934
std 0.168 0.189 0.168 0.111
min 0.262 0.158 0.262 0.537
25% 0.622 0.588 0.622 0.914
50% 0.738 0.754 0.748 0.989
75% 0.888 0.879 0.901 1.000
max 1.000 1.000 1.000 1.000
correlation count 273.000 232.000 274.000 274.000
mean 0.815 0.780 0.836 0.895
std 0.202 0.224 0.182 0.136
min 0.108 0.056 0.320 0.509
25% 0.685 0.616 0.719 0.813
50% 0.889 0.854 0.896 0.974
75% 1.000 0.989 1.000 1.000
max 1.000 1.000 1.000 1.000
SALAMI_level_function L1 count 460.000 446.000 459.000 465.000
mean 0.741 0.694 0.751 0.824
std 0.187 0.214 0.179 0.153
min 0.228 0.112 0.298 0.502
25% 0.593 0.536 0.608 0.694
50% 0.740 0.700 0.750 0.857
75% 0.915 0.877 0.919 0.974
max 1.000 1.000 1.000 1.000
L2 count 455.000 434.000 454.000 460.000
mean 0.734 0.680 0.744 0.827
std 0.193 0.217 0.184 0.152
min 0.228 0.103 0.290 0.500
25% 0.582 0.528 0.607 0.692
50% 0.731 0.683 0.746 0.864
75% 0.912 0.856 0.920 0.974
max 1.000 1.000 1.000 1.000
correlation count 437.000 379.000 442.000 445.000
mean 0.750 0.704 0.764 0.820
std 0.201 0.230 0.190 0.160
min 0.248 0.112 0.290 0.508
25% 0.599 0.524 0.623 0.674
50% 0.757 0.725 0.770 0.851
75% 0.947 0.916 0.947 0.979
max 1.000 1.000 1.000 1.000
SALAMI_level_large_scale L1 count 610.000 567.000 616.000 618.000
mean 0.749 0.714 0.755 0.790
std 0.189 0.192 0.181 0.155
min 0.116 0.138 0.244 0.500
25% 0.603 0.566 0.614 0.662
50% 0.750 0.711 0.757 0.789
75% 0.923 0.878 0.912 0.944
max 1.000 1.000 1.000 1.000
L2 count 615.000 572.000 622.000 623.000
mean 0.748 0.707 0.752 0.788
std 0.189 0.201 0.182 0.156
min 0.214 0.138 0.244 0.504
25% 0.602 0.552 0.610 0.657
50% 0.750 0.700 0.756 0.787
75% 0.919 0.880 0.912 0.947
max 1.000 1.000 1.000 1.000
correlation count 577.000 534.000 590.000 597.000
mean 0.764 0.735 0.769 0.813
std 0.192 0.203 0.186 0.161
min 0.138 0.085 0.235 0.503
25% 0.619 0.583 0.630 0.674
50% 0.766 0.744 0.770 0.826
75% 0.964 0.917 0.956 0.988
max 1.000 1.000 1.000 1.000
SALAMI_level_small_scale L1 count 454.000 437.000 456.000 463.000
mean 0.696 0.674 0.702 0.717
std 0.157 0.158 0.154 0.147
min 0.336 0.249 0.361 0.501
25% 0.573 0.558 0.584 0.592
50% 0.671 0.650 0.673 0.692
75% 0.795 0.776 0.801 0.822
max 1.000 1.000 1.000 1.000
L2 count 449.000 438.000 454.000 465.000
mean 0.697 0.671 0.704 0.716
std 0.158 0.162 0.155 0.147
min 0.345 0.249 0.360 0.501
25% 0.576 0.550 0.585 0.590
50% 0.675 0.650 0.675 0.692
75% 0.803 0.774 0.809 0.817
max 1.000 1.000 1.000 1.000
correlation count 483.000 466.000 491.000 500.000
mean 0.725 0.698 0.725 0.742
std 0.161 0.171 0.158 0.148
min 0.250 0.130 0.250 0.501
25% 0.604 0.572 0.608 0.618
50% 0.705 0.688 0.709 0.725
75% 0.857 0.830 0.847 0.869
max 1.000 1.000 1.000 1.000


In [14]:
vars().update(pickle.load(open('../data/scores_datasetESALAMI_levelEsmall_scale_distEcorrelation.pk', 'r')))

In [15]:
fscores = pd.DataFrame.from_dict([f['fmeasures'] for f in file_scores if f['fmeasures'] is not None])

In [16]:
fscores.describe()


Out[16]:
hmean max min none
count 668.000 652.000 664.000 665.000
mean 0.640 0.619 0.636 0.650
std 0.212 0.212 0.211 0.211
min 0.109 0.096 0.109 0.114
25% 0.483 0.461 0.474 0.502
50% 0.645 0.614 0.640 0.652
75% 0.806 0.778 0.793 0.811
max 1.000 1.000 1.000 1.000

In [17]:
k = 382
for norm in [None, 'min', 'max', 'hmean']:
    print plot_curve(norm=norm, **file_scores[k])


(0.31321433, 0.98245614035087714)
(0.019467199829822143, 0.6724202626641651)
(0.019174352437899125, 0.57854560064282845)
(0.032161135357831182, 0.63529411764705879)

In [19]:
plt.figure(figsize=(12,5))
seaborn.heatmap(file_scores[k]['scores'],
                yticklabels=file_scores[k]['labels'],
                xticklabels=file_scores[k]['labels'])
plt.tight_layout()



In [ ]: